library(rgdal)
library(foreign)
library(data.table)
library(rhandsontable)
library(spdep)
library(tmap)
# Load districts and attributes
g2 <- readOGR("./data", "svyMaps_2016.06.22_sara")
OGR data source with driver: ESRI Shapefile
Source: "./data", layer: "svyMaps_2016.06.22_sara"
with 2078 features
It has 11 fields
dt2 <- read.dta("./data/SSApoverty_Dist_forGWR.12.dta")
# Keep STATA labels
dt2.lbl <- data.table(varCode=names(dt2), varLabel=attr(dt2, "var.labels"))
setkey(dt2.lbl, varCode)
dt2 <- data.table(dt2)
# Merge attributes to shapefile
g2.dt <- data.table(g2@data)
g2.dt[, rn22 := as.character(rn)]
g2.dt[, rn := row.names(g2)]
# Recode Ethiopia woredas
dt2[svyCode=="eth2010", svyL2Cd := svyL1Cd * 10000 + svyL2Cd]
setkey(g2.dt, ISO3, svyCode, svyL1Cd, svyL2Cd)
setkey(dt2, ISO3, svyCode, svyL1Cd, svyL2Cd)
# Any unmatched?
dt2[!g2.dt][, .N, by=svyCode]
Empty data.table (0 rows) of 2 cols: svyCode,N
# Remove `rn` from Sara's vars
dt2[, rn := NULL]
# Seems okay, so merge
g2.dt <- dt2[g2.dt]
names(g2.dt)[names(g2.dt) %like% "rn"]
[1] "rn" "rn22"
# Neighbor list
coords <- coordinates(g2)
nb2 <- poly2nb(g2, row.names=paste(g2$ISO3, g2$rn, sep="."))
nb2
Neighbour list object:
Number of regions: 2078
Number of nonzero links: 10902
Percentage nonzero weights: 0.2524731
Average number of links: 5.246391
3 regions with no links:
MOZ.424 MWI.757 MWI.780
Plot contiguities for e.g. Ghana
# Plot contiguities (e.g. GHA)
gha <- g2[g2$ISO3=="GHA",]
gha.dt <- g2.dt[ISO3=="GHA"]
gha <- SpatialPolygonsDataFrame(gha, data.frame(gha.dt), match.ID="rn")
coords.gha <- coordinates(gha)
nb2.gha <- poly2nb(gha)
plot(gha, border="grey",
main=list("Contiguity - Ghana (170 districts)"))
plot(nb2.gha, coords.gha, col="blue", add=T)
legend("bottomleft",
legend=capture.output(summary(nb2.gha))[1:7], cex=.6)
# Spatial weights for SSA (and Ghana only)
#w2 <- nb2listw(nb2)
w.gha <- nb2listw(nb2.gha)
# Explanatory vars
var <- c("spei_lt", "L1_speihishock", "L1_speiloshock")
# Plot to identify districts with missing values (Ghana only)
tmap_mode("view")
qtm(gha, fill=var[1])
# Impute missing values with regional median
tmp <- gha.dt[, lapply(.SD, median, na.rm=T), by=svyL1Cd, .SDcols=var]
setnames(tmp, 2:4, paste(names(tmp), "imp", sep="_"))
setkey(tmp, svyL1Cd)
setkey(gha.dt, svyL1Cd)
gha.dt <- tmp[gha.dt]
gha.dt[is.na(spei_lt), spei_lt := spei_lt_imp]
gha.dt[is.na(L1_speihishock), L1_speihishock := L1_speihishock_imp]
gha.dt[is.na(L1_speiloshock), L1_speiloshock := L1_speiloshock_imp]
# Spatial weights for SSA (and Ghana only)
#w2 <- nb2listw(nb2)
w.gha <- nb2listw(nb2.gha)
# Explanatory vars
var <- c("spei_lt", "L1_speihishock", "L1_speiloshock")
# Plot to identify districts with missing values (Ghana only)
tmap_mode("view")
tmap mode set to interactive viewing
qtm(gha, fill=var[1], colorNA="grey", textNA="Missing")